package com.yk.weibo.servie.content.impl;

import com.yk.weibo.model.SinaModel;
import com.yk.weibo.process.Main;
import com.yk.weibo.servie.content.api.IContentService;
import org.json.JSONArray;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.sql.Timestamp;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * Project Name:weiboCrawler
 * File Name:ContentServiceImpl
 * Package Name:com.yk.weibo.servie.content.impl
 * Date:2018/1/9 18:07
 * Author:zhangshaoyang
 * Description:
 * Copyright (c) 2018, 重庆云凯科技有限公司 All Rights Reserved.
 */
@Service("contentService")
public class ContentServiceImpl implements IContentService {

    /**
     * 根据cookie和url获取网页内容
     *
     * @param cookie
     * @param url
     * @return
     * @throws IOException
     */
    @Override
    public StringBuffer getContent(String cookie, URL url) throws IOException {
        getProxy(Main.ip, Main.port);
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        connection.setRequestProperty("Cookie", cookie);
        connection.setRequestMethod("GET");
        int code = connection.getResponseCode();
        StringBuffer buffer = new StringBuffer();
        if (code == 200) {
            InputStream in = connection.getInputStream();
            BufferedReader reader = new BufferedReader(new InputStreamReader(in));
            String temp = "";
            while ((temp = reader.readLine()) != null) {
                buffer.append(temp);
            }
        }
        return buffer;
    }

    /**
     * 获取代理ip和端口
     *
     * @param ip
     * @param port
     */
    private void getProxy(String ip, String port) {
        if (ip != null && port == null && !ip.equals("") && !port.equals("")) {
            System.setProperty("http.proxySet", "true");
            System.setProperty("http.proxyHost", ip);
            System.setProperty("http.proxyPort", port);
        }
    }

    /**
     * 解析网页内容
     *
     * @param html
     * @return
     */
    @Override
    public List<SinaModel> analyzeContent(String html) {

        Document document = Jsoup.parse(html);
        Elements wb_feed_types = document.getElementsByClass("WB_feed_type");
        List<SinaModel> dataArr = new ArrayList<>();
        for (Element e : wb_feed_types) {
            dataArr.add(this.analyzeSingleContent(e));
        }
        return dataArr;
    }

    /**
     * 解析网页元素
     *
     * @param e
     * @return
     */
    @Override
    public SinaModel analyzeSingleContent(Element e) {
        SinaModel model = new SinaModel();
        model.setUsername(e.getElementsByClass("W_face_radius").get(0).attr("title"));
        Element c_e_wbform = e.getElementsByClass("WB_from").get(0);

        //发布时间
        String temp_time = c_e_wbform.child(0).text();
        Pattern pattern = Pattern.compile("\\d*-\\d*-\\d*.{1,10}\\d*:\\d*");
        Matcher matcher = pattern.matcher(temp_time);
        Pattern pattern1 = Pattern.compile("\\d{1,2}分钟前");
        Matcher matcher1 = pattern1.matcher(temp_time);
        Pattern pattern2 = Pattern.compile("\\d{1,2}秒前");
        Matcher matcher2 = pattern2.matcher(temp_time);
        if (!matcher.matches()) {
            if (matcher1.matches()) {
                System.out.println(temp_time);
                StringTokenizer stringTokenizer = new StringTokenizer(matcher1.group(), "分");
                //取得XX分钟前
                long pre_minute = Long.parseLong((stringTokenizer.nextToken()));
                model.setPublishTime(new Timestamp((Calendar.getInstance().getTimeInMillis()/1000 - pre_minute * 60)*1000));
                System.out.println(new Timestamp((Calendar.getInstance().getTimeInMillis()/1000 - pre_minute * 60)*1000));
            }else if (matcher2.matches()){
                System.out.println(temp_time);
                StringTokenizer stringTokenizer = new StringTokenizer(matcher2.group(), "秒");
                //取得XX秒前
                long pre_second = Long.parseLong((stringTokenizer.nextToken()));
                model.setPublishTime(new Timestamp((Calendar.getInstance().getTimeInMillis()/1000 - pre_second)*1000));
                System.out.println(new Timestamp((Calendar.getInstance().getTimeInMillis()/1000 - pre_second)*1000));
            }
            else {

                System.out.println(temp_time);
                temp_time = temp_time.replaceAll("月", "-");
                temp_time = temp_time.replaceAll("日", "");
                temp_time = temp_time.replaceAll("今天", Calendar.getInstance().get(Calendar.MONTH) + 1 + "-" + Calendar.getInstance().get(Calendar.DATE));
                temp_time = Calendar.getInstance().get(Calendar.YEAR) + "-" + temp_time;
                model.setPublishTime(Timestamp.valueOf(temp_time + ":00"));
                System.out.println(temp_time);
            }
        }

        if (c_e_wbform.children().size() >= 2) {
            model.setEquipment(c_e_wbform.child(1).text());
        }
        //内容提取
        Element c_e_wbtext = e.getElementsByClass("WB_text").get(0);

        model.setContent(c_e_wbtext.text().replaceAll("\\s*", ""));
        if (c_e_wbtext.children().size() != 0) {
            JSONArray temp_arr = new JSONArray();
            for (Element c_e : c_e_wbtext.children()) {
                String tempHref = "";
                if ((tempHref = c_e.attr("href")) != null && !(tempHref = c_e.attr("href")).equals("")) {
                    temp_arr.put(tempHref);
                }
            }
            model.setContentlink(temp_arr.toString());
        }

        //图片连接抓取
        JSONArray imgArr = new JSONArray();
        Elements c_e_mediaboxs = e.getElementsByClass("media_box");
        if (c_e_mediaboxs != null) {
            Element c_e_mediabox = null;
            if (c_e_mediaboxs.size() != 0) {
                c_e_mediabox = c_e_mediaboxs.get(0);
            }
            if (c_e_mediabox != null && c_e_mediabox.getElementsByTag("img") != null) {
                c_e_mediabox.getElementsByTag("img").stream().forEach(element -> {
                    String temp = element.attr("src");
                    if (temp != null && !temp.equals("")) {
                        imgArr.put(temp);
                    }
                });
            }
            model.setImglink(imgArr.toString());
        }
        Elements c_e_wbrowlines = e.getElementsByClass("WB_row_line").get(0).children();

        String tran_temp = c_e_wbrowlines.get(1).text().replaceAll("\\s*", "")
                .replaceAll("\\D", "");
        String comment_temp = c_e_wbrowlines.get(2).text().replaceAll("\\s*", "")
                .replaceAll("\\D", "");
        String like_temp = c_e_wbrowlines.get(3).text().replaceAll("\\s*", "")
                .replaceAll("\\D", "");

        if (isNum(tran_temp)) model.setTransmitnum(Integer.valueOf(tran_temp));
        else model.setTransmitnum(0);
        if (isNum(comment_temp)) model.setCommentnum(Integer.valueOf(comment_temp));
        else model.setCommentnum(0);
        if (isNum(like_temp)) model.setLikenum(Integer.valueOf(like_temp));
        else model.setLikenum(0);
        return model;
    }

    /**
     * 判断是否是数字
     *
     * @param str
     * @return
     */
    private boolean isNum(String str) {
        if (str == null || str.equals("")) {
            return false;
        }
        Pattern pattern = Pattern.compile("[0-9]*");
        Matcher isNum = pattern.matcher(str);
        if (!isNum.matches()) {
            return false;
        }
        return true;
    }

}
